https://stats.stackexchange.com/questions/96972/how-to-interpret-parameters-in-glm-with-family-gamma http://rstudio-pubs-static.s3.amazonaws.com/5691_192685385fc445c9b3fb1619960a20e2.html
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ readr 1.3.1 ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(glue) # for pretty printing
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
library(tictoc) # for cell/function profiling
projects = read.csv("../../data/processed/gofundme_projects_clean.csv")
projects$month = factor(projects$month)
projects$day_of_week = factor(projects$day_of_week)
In total, we have N=5223 IID samples to work with.
Removed all random variabes except year because they didn’t help explain any variance in the data beyond what the residuals could capture. Year is a reasonable random effect as well (see Variance Components, Searle et al 2006)
Nesting and Chi2 differences: https://www.psychologie.uzh.ch/dam/jcr:ffffffff-b371-2797-0000-00000fda8f29/chisquare_diff_en.pdf
upper_limit = quantile(projects$mean_donation, 0.995) # remove the top 0.5% of projects, eventually create models with these data included
projects.trunc = projects[projects$mean_donation < upper_limit, ]
projects.trunc %>%
ggplot() + labs(title="Mean Donation Density") +
geom_density(aes(mean_donation))
projects.trunc %>%
ggplot() + geom_qq(aes(sample=mean_donation), distribution = qexp) + geom_qq_line(aes(sample=mean_donation), distribution = qexp)
projects.trunc %>%
ggplot(aes(goal, mean_donation)) + labs(title="Goal Amount Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()
projects.trunc %>%
ggplot(aes(duration_float, mean_donation)) + labs(title="Duration Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()
projects.trunc %>%
ggplot(aes(text_length_words, mean_donation)) + labs(title="Text Length Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()
projects.trunc %>%
ggplot(aes(photos, mean_donation)) + labs(title="Photos Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()
projects.trunc %>%
ggplot(aes(updates, mean_donation)) + labs(title="Updates Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()
projects.trunc %>%
ggplot(aes(friends, mean_donation)) + labs(title="FB Friends Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()
projects.trunc %>%
ggplot(aes(shares, mean_donation)) + labs(title="FB Shares Distribution") +
geom_point(aes(alpha=0.1)) +
theme_minimal()
projects.trunc %>%
ggplot(aes(cancer_type, mean_donation)) + labs(title="Cancer Types") +
geom_boxplot() +
theme_minimal() +
theme(axis.text.x=element_text(angle = 60, hjust=1))
The data look like a Gamma, so we model with Gamma and a log link (see above answer for reason). It might be nice
Ideally, I could run a distributional test and confirm (or reject) this intuition.
Establish the base formula to build models off of:
base.formula = mean_donation ~ shares_sc + friends_sc + updates_sc + photos_sc + goal_sc + text_length_words_sc + duration_float_sc + cancer_type + month + day_of_week + (1|year)
Create a helper function to easily compare two models and pretty print the output:
compare = function(model.old, model.new, key, multiplier=1.0) {
a = anova(model.old, model.new)
p = a$`Pr(>Chisq)`[2]
print(glue("Variable: {key}"))
print(glue("DF diff: {a$`Chi Df`[2]}\t New AIC: {round(a$AIC[2], 1)}\t\t Chisq: {round(a$Chisq[2],1)}\t P(>chisq) = {round(p,4)} {if (p < 0.05) '(significant)' else ''})"))
effect = as.numeric(summary(mod)$coefficients[key, ])
print(glue("beta: {round(effect[1],4)} (+/- {round(effect[2],3)})\t stat: {round(effect[3], 2)} => P(>|z|) = {round(effect[4], 4)}"))
print(glue("rate ratio: {round((exp(effect[1]*multiplier) - 1)*100, 1)}% ({round((exp((effect[1] - effect[2])*multiplier) - 1)*100, 1)}% to {round((exp((effect[1] + effect[2])*multiplier) - 1)*100, 1)}%)"))
print(glue(""))
}
https://stats.stackexchange.com/questions/96972/how-to-interpret-parameters-in-glm-with-family-gamma
Key to interpretation is the log link used here
To put the results from the continuous predictors on interpretable scales, we use a multiplier within the exponent (see factor in compare()). For example, if the multiplier is 1, this leads to: for a one unit increase in this predictor, we see an x-fold increase in the DV.
p = function(var, scl=1, digits=6, mult=1.00, mask=T) {
mu = mean(projects[mask, var])
sd = sd(projects[mask, var])
print(glue("Var: {var}\t Mean: {round(mu,digits)*scl}\t SD: {round(sd,digits)*scl}\t Scale (x{round(mult, 2)}): {paste(round(mult*sd + mu, digits)*scl)}"))
}
p("journey_salience", scl = 1000, digits = 5, mult = 0.6)
## Var: journey_salience Mean: 0.52 SD: 1.68 Scale (x0.6): 1.53
p("battle_salience", scl = 1000, digits = 5, mult = 0.16)
## Var: battle_salience Mean: 3.87 SD: 6.27 Scale (x0.16): 4.87
p("journey_prod", digits = 4, mask = projects$journey_metaphor > 0)
## Var: journey_prod Mean: 1.5567 SD: 1.0602 Scale (x1): 2.6169
p("battle_prod", digits = 4, mask = projects$battle_metaphor > 0)
## Var: battle_prod Mean: 3.4581 SD: 3.0339 Scale (x1): 6.492
p("first_instantiation", digits = 4, mask = projects$journey_metaphor > 0, mult = 1.6)
## Var: first_instantiation Mean: 0.3943 SD: 0.3191 Scale (x1.6): 0.9049
p("first_instantiation", digits = 4, mask = projects$battle_metaphor > 0, mult = 1.6)
## Var: first_instantiation Mean: 0.3952 SD: 0.3111 Scale (x1.6): 0.8929
Examine: does having any instance of metaphor influence mean donation?
Null: having any metaphor is not significantly different from having no metaphor
projects.trunc %>%
ggplot() + theme_minimal() +
geom_violin(aes(any_metaphor, mean_donation, fill=any_metaphor)) +
scale_fill_manual(values = c("FALSE" = alpha("red", 0.5), "TRUE" = alpha("blue", 0.5)), guide=F) +
geom_hline(yintercept = mean(projects.trunc[projects.trunc$any_metaphor == F, ]$mean_donation), lty=2, color="red") +
geom_hline(yintercept = mean(projects.trunc[projects.trunc$any_metaphor, ]$mean_donation), lty=2, color="blue")
mean.mod.base = glmer(base.formula, data = projects.trunc, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0321244
## (tol = 0.001, component 1)
# drop1(mean.mod.base, test="Chisq")
f = update(base.formula, ~ . - duration_float_sc)
# drop1(glmer(f, data = projects.trunc, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - day_of_week)
# drop1(glmer(f, data = projects.trunc, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - photos_sc)
# drop1(glmer(f, data = projects.trunc, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - month)
# drop1(glmer(f, data = projects.trunc, family = Gamma(link = "log")), test="Chisq")
mean.mod = glmer(f, data = projects.trunc, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00125067
## (tol = 0.001, component 1)
# summary(mean.mod)
# m2 = glm(status ~ shares_sc + friends_sc + updates_sc + goal_sc + text_length_words_sc + cancer_type + scale(journey_salience), data = projects.trunc.jor, family = "binomial")
#
# m1 = glm(status ~ shares_sc + friends_sc + updates_sc + goal_sc + text_length_words_sc + cancer_type, data = projects.trunc.jor, family = "binomial")
# anova(m1,m2, test="Chisq")
# d = data.frame(name = names(coef(m2)), mu = as.numeric(coef(m2)))
# d$LB = ci[as.character(d$name), 1]
# d$UB = ci[as.character(d$name), 2]
# d$sig = 0 > d$UB | 0 < d$LB
# d = d[d$name != "(Intercept)", ]
# d
# ggplot(d) +
# geom_bar(aes(x=name, y=mu, fill=sig), stat="identity") +
# geom_point(aes(x=name, y=LB), color="red") +
# geom_point(aes(x=name, y=UB), color="blue") +
# coord_flip()
mod = glmer(update(f, ~ . + any_metaphor), data = projects.trunc, family = Gamma(link = "log"))
compare(mean.mod, mod, "any_metaphorTRUE")
## Variable: any_metaphorTRUE
## DF diff: 1 New AIC: 52318.9 Chisq: 39.9 P(>chisq) = 0 (significant))
## beta: 0.0802 (+/- 0.013) stat: 6.34 => P(>|z|) = 0
## rate ratio: 8.4% (7% to 9.7%)
mod = glmer(update(base.formula, ~ . + any_metaphor), data = projects.trunc, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0422402
## (tol = 0.001, component 1)
compare(mean.mod, mod, "any_metaphorTRUE")
## Variable: any_metaphorTRUE
## DF diff: 20 New AIC: 52337.2 Chisq: 59.7 P(>chisq) = 0 (significant))
## beta: 0.08 (+/- 0.013) stat: 6.32 => P(>|z|) = 0
## rate ratio: 8.3% (7% to 9.7%)
Examine: does having both metaphor families present influence mean donation?
Null: having both metaphor families is not significantly different from having no metaphor
projects.trunc.both = projects.trunc[(1-projects.trunc$any_metaphor) | projects.trunc$both_metaphor, ] # only projects with both or none
N = 2790
projects.trunc.both %>%
ggplot() + theme_minimal() +
geom_violin(aes(both_metaphor, mean_donation, fill=both_metaphor)) +
scale_fill_manual(values = c("FALSE" = alpha("red", 0.5), "TRUE" = alpha("blue", 0.5)), guide=F) +
geom_hline(yintercept = mean(projects.trunc.both[projects.trunc.both$both_metaphor == F, ]$mean_donation), lty=2, color="red") +
geom_hline(yintercept = mean(projects.trunc.both[projects.trunc.both$both_metaphor, ]$mean_donation), lty=2, color="blue")
Perform model selection to prevent overfitting:
mean.mod.base = glmer(base.formula, data = projects.trunc.both, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0414938
## (tol = 0.001, component 1)
# drop1(mean.mod.base, test="Chisq")
f = update(base.formula, ~ . - month)
# drop1(glmer(f, data = projects.trunc.both, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - photos_sc)
# drop1(glmer(f, data = projects.trunc.both, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - day_of_week)
# drop1(glmer(f, data = projects.trunc.both, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - duration_float_sc)
# drop1(glmer(f, data = projects.trunc.both, family = Gamma(link = "log")), test="Chisq")
mean.mod = glmer(f, data = projects.trunc.both, family = Gamma(link = "log"))
# summary(mean.mod)
mod = glmer(update(f, ~ . + both_metaphor), data = projects.trunc.both, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00146278
## (tol = 0.001, component 1)
compare(mean.mod, mod, "both_metaphorTRUE")
## Variable: both_metaphorTRUE
## DF diff: 1 New AIC: 27989.8 Chisq: 7.7 P(>chisq) = 0.0054 (significant))
## beta: 0.0709 (+/- 0.026) stat: 2.77 => P(>|z|) = 0.0056
## rate ratio: 7.4% (4.6% to 10.1%)
mod = glmer(update(base.formula, ~ . + both_metaphor), data = projects.trunc.both, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0380102
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "both_metaphorTRUE")
## Variable: both_metaphorTRUE
## DF diff: 1 New AIC: 28016.8 Chisq: 8.6 P(>chisq) = 0.0033 (significant))
## beta: 0.0747 (+/- 0.026) stat: 2.91 => P(>|z|) = 0.0037
## rate ratio: 7.8% (5% to 10.6%)
Examine: does having only journey/battle, or dominant journey/battle, influence mean donation?
Null: having only journey/battle, or dominant journey/battle, is not significantly different from negative
If journey is not the dominant metaphor, that means battle is or there are equal amounts of each family If journey is not the only metaphor family, that means there is at least some battle metaphor
Dominant Journey: Null: There is no difference in outcomes when journey dominates compared to when battle dominates or there is no dominant family.
Dominant Battle: Null: There is no difference in outcomes when battle dominates compared to when journey dominates or there is no dominant family.
Only journey: Null: There is no difference in outcomes when journey is the only metaphor family present compared to when there is also battle metaphor.
projects.trunc.some = projects.trunc[projects.trunc$any_metaphor, ]
N = 2842
p1 = projects.trunc.some %>%
ggplot() + theme_minimal() +
geom_violin(aes(dom_journey, mean_donation, fill=dom_journey)) +
scale_fill_manual(values = c("FALSE" = alpha("red", 0.5), "TRUE" = alpha("blue", 0.5)), guide=F) +
geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$dom_journey == F, ]$mean_donation), lty=2, color="red") +
geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$dom_journey, ]$mean_donation), lty=2, color="blue")
p2 = projects.trunc.some %>%
ggplot() + theme_minimal() +
geom_violin(aes(dom_battle, mean_donation, fill=dom_battle)) +
scale_fill_manual(values = c("FALSE" = alpha("red", 0.5), "TRUE" = alpha("blue", 0.5)), guide=F) +
geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$dom_battle == F, ]$mean_donation), lty=2, color="red") +
geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$dom_battle, ]$mean_donation), lty=2, color="blue")
p3 = projects.trunc.some %>%
ggplot() + theme_minimal() +
geom_violin(aes(only_journey, mean_donation, fill=only_journey)) +
scale_fill_manual(values = c("FALSE" = alpha("red", 0.5), "TRUE" = alpha("blue", 0.5)), guide=F) +
geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$only_journey == F, ]$mean_donation), lty=2, color="red") +
geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$only_journey, ]$mean_donation), lty=2, color="blue")
p4 = projects.trunc.some %>%
ggplot() + theme_minimal() +
geom_violin(aes(only_battle, mean_donation, fill=only_battle)) +
scale_fill_manual(values = c("FALSE" = alpha("red", 0.5), "TRUE" = alpha("blue", 0.5)), guide=F) +
geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$only_battle == F, ]$mean_donation), lty=2, color="red") +
geom_hline(yintercept = mean(projects.trunc.some[projects.trunc.some$only_battle, ]$mean_donation), lty=2, color="blue")
grid.arrange(p1, p2, p3, p4, nrow=1)
Perform model selection to prevent overfitting:
mean.mod.base = glmer(base.formula, data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.051572
## (tol = 0.001, component 1)
# drop1(mean.mod.base, test="Chisq")
f = update(base.formula, ~ . - duration_float_sc)
# drop1(glmer(f, data = projects.trunc.some, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - day_of_week)
# drop1(glmer(f, data = projects.trunc.some, family = Gamma(link = "log")), test="Chisq")
mean.mod = glmer(f, data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0412533
## (tol = 0.001, component 1)
# summary(mean.mod)
mod = glmer(update(f, ~ . + dom_journey), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0623054
## (tol = 0.001, component 1)
compare(mean.mod, mod, "dom_journeyTRUE")
## Variable: dom_journeyTRUE
## DF diff: 1 New AIC: 28671.9 Chisq: 1.4 P(>chisq) = 0.2288 )
## beta: 0.0301 (+/- 0.025) stat: 1.2 => P(>|z|) = 0.2297
## rate ratio: 3.1% (0.5% to 5.7%)
mod = glmer(update(f, ~ . + dom_battle), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.064289
## (tol = 0.001, component 1)
compare(mean.mod, mod, "dom_battleTRUE")
## Variable: dom_battleTRUE
## DF diff: 1 New AIC: 28672.8 Chisq: 0.6 P(>chisq) = 0.4524 )
## beta: -0.0162 (+/- 0.021) stat: -0.75 => P(>|z|) = 0.4513
## rate ratio: -1.6% (-3.7% to 0.5%)
mod = glmer(update(f, ~ . + only_journey), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0617672
## (tol = 0.001, component 1)
compare(mean.mod, mod, "only_journeyTRUE")
## Variable: only_journeyTRUE
## DF diff: 1 New AIC: 28672.7 Chisq: 0.7 P(>chisq) = 0.4037 )
## beta: 0.023 (+/- 0.027) stat: 0.84 => P(>|z|) = 0.4002
## rate ratio: 2.3% (-0.4% to 5.2%)
mod = glmer(update(f, ~ . + only_battle), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0621089
## (tol = 0.001, component 1)
compare(mean.mod, mod, "only_battleTRUE")
## Variable: only_battleTRUE
## DF diff: 1 New AIC: 28672.8 Chisq: 0.6 P(>chisq) = 0.4495 )
## beta: -0.0143 (+/- 0.019) stat: -0.76 => P(>|z|) = 0.4443
## rate ratio: -1.4% (-3.2% to 0.4%)
mod = glmer(update(base.formula, ~ . + dom_journey), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.062997
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "dom_journeyTRUE")
## Variable: dom_journeyTRUE
## DF diff: 1 New AIC: 28681.6 Chisq: 1.5 P(>chisq) = 0.223 )
## beta: 0.0307 (+/- 0.025) stat: 1.22 => P(>|z|) = 0.2212
## rate ratio: 3.1% (0.6% to 5.7%)
mod = glmer(update(base.formula, ~ . + dom_battle), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.075027
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "dom_battleTRUE")
## Variable: dom_battleTRUE
## DF diff: 1 New AIC: 28682.5 Chisq: 0.5 P(>chisq) = 0.4672 )
## beta: -0.0155 (+/- 0.021) stat: -0.72 => P(>|z|) = 0.4713
## rate ratio: -1.5% (-3.6% to 0.6%)
mod = glmer(update(base.formula, ~ . + only_journey), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0546125
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "only_journeyTRUE")
## Variable: only_journeyTRUE
## DF diff: 1 New AIC: 28682.4 Chisq: 0.7 P(>chisq) = 0.4044 )
## beta: 0.0232 (+/- 0.027) stat: 0.85 => P(>|z|) = 0.3964
## rate ratio: 2.3% (-0.4% to 5.2%)
mod = glmer(update(base.formula, ~ . + only_battle), data = projects.trunc.some, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0547341
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "only_battleTRUE")
## Variable: only_battleTRUE
## DF diff: 1 New AIC: 28682.6 Chisq: 0.5 P(>chisq) = 0.4817 )
## beta: -0.0131 (+/- 0.019) stat: -0.71 => P(>|z|) = 0.4807
## rate ratio: -1.3% (-3.1% to 0.6%)
projects.trunc.jor = projects.trunc[projects.trunc$journey_metaphor > 0, ]
N = 693
p1 = projects.trunc.jor %>%
ggplot(aes(scale(journey_salience), mean_donation)) + theme_minimal() +
geom_point() + geom_smooth(method = "lm", se = F, color="red") + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = F, color="blue")
p2 = projects.trunc.jor %>%
ggplot(aes(scale(journey_prod), mean_donation)) + theme_minimal() +
geom_point() + geom_smooth(method = "lm", se = F, color="red") + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = F, color="blue")
p3 = projects.trunc.jor %>%
ggplot(aes(scale(first_instantiation), mean_donation)) + theme_minimal() +
geom_point() + geom_smooth(method = "lm", se = F, color="red") + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = F, color="blue")
grid.arrange(p1, p2, p3, nrow=1)
Perform model selection to prevent overfitting:
mean.mod.base = glmer(base.formula, data = projects.trunc.jor, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00287654
## (tol = 0.001, component 1)
# drop1(mean.mod.base, test="Chisq")
f = update(base.formula, ~ . - text_length_words_sc)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - updates_sc)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - cancer_type)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - day_of_week)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - duration_float_sc)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - photos_sc)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - month)
# drop1(glmer(f, data = projects.trunc.jor, family = Gamma(link = "log")), test="Chisq")
mean.mod = glmer(f, data = projects.trunc.jor, family = Gamma(link = "log"))
# summary(mean.mod)
mod = glmer(update(f, ~ . + scale(journey_salience)), data = projects.trunc.jor, family = Gamma(link = "log"))
compare(mean.mod, mod, "scale(journey_salience)", 0.6)
## Variable: scale(journey_salience)
## DF diff: 1 New AIC: 6954.3 Chisq: 2.3 P(>chisq) = 0.1319 )
## beta: 0.0223 (+/- 0.015) stat: 1.49 => P(>|z|) = 0.1349
## rate ratio: 1.3% (0.4% to 2.3%)
mod = glmer(update(f, ~ . + scale(journey_prod)), data = projects.trunc.jor, family = Gamma(link = "log"))
compare(mean.mod, mod, "scale(journey_prod)")
## Variable: scale(journey_prod)
## DF diff: 1 New AIC: 6948.5 Chisq: 8.1 P(>chisq) = 0.0045 (significant))
## beta: 0.0422 (+/- 0.015) stat: 2.79 => P(>|z|) = 0.0052
## rate ratio: 4.3% (2.7% to 5.9%)
mod = glmer(update(f, ~ . + scale(first_instantiation)), data = projects.trunc.jor, family = Gamma(link = "log"))
compare(mean.mod, mod, "scale(first_instantiation)")
## Variable: scale(first_instantiation)
## DF diff: 1 New AIC: 6955.5 Chisq: 1 P(>chisq) = 0.3109 )
## beta: -0.0154 (+/- 0.015) stat: -1.01 => P(>|z|) = 0.3103
## rate ratio: -1.5% (-3% to 0%)
mod = glmer(update(base.formula, ~ . + scale(journey_salience)), data = projects.trunc.jor, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00422534
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "scale(journey_salience)", 0.6)
## Variable: scale(journey_salience)
## DF diff: 1 New AIC: 6987 Chisq: 3.5 P(>chisq) = 0.0625 )
## beta: 0.0325 (+/- 0.018) stat: 1.84 => P(>|z|) = 0.0651
## rate ratio: 2% (0.9% to 3%)
mod = glmer(update(base.formula, ~ . + scale(journey_prod)), data = projects.trunc.jor, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00582552
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "scale(journey_prod)")
## Variable: scale(journey_prod)
## DF diff: 1 New AIC: 6983.3 Chisq: 7.1 P(>chisq) = 0.0076 (significant))
## beta: 0.0434 (+/- 0.016) stat: 2.64 => P(>|z|) = 0.0082
## rate ratio: 4.4% (2.7% to 6.2%)
mod = glmer(update(base.formula, ~ . + scale(first_instantiation)), data = projects.trunc.jor, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00550599
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "scale(first_instantiation)")
## Variable: scale(first_instantiation)
## DF diff: 1 New AIC: 6990 Chisq: 0.5 P(>chisq) = 0.4774 )
## beta: -0.0109 (+/- 0.015) stat: -0.71 => P(>|z|) = 0.4762
## rate ratio: -1.1% (-2.6% to 0.4%)
projects.trunc.bat = projects.trunc[projects.trunc$battle_metaphor > 0, ]
N = 2585
p1 = projects.trunc.bat %>%
ggplot(aes(scale(battle_salience), mean_donation)) + theme_minimal() +
geom_point() + geom_smooth(method = "lm", se = F, color="red") + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = F, color="blue")
p2 = projects.trunc.bat %>%
ggplot(aes(scale(battle_prod), mean_donation)) + theme_minimal() +
geom_point() + geom_smooth(method = "lm", se = F, color="red") + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = F, color="blue")
p3 = projects.trunc.bat %>%
ggplot(aes(scale(first_instantiation), mean_donation)) + theme_minimal() +
geom_point() + geom_smooth(method = "lm", se = F, color="red") + geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = F, color="blue")
grid.arrange(p1, p2, p3, nrow=1)
Perform model selection to prevent overfitting:
mean.mod.base = glmer(base.formula, data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0698876
## (tol = 0.001, component 1)
# drop1(mean.mod.base, test="Chisq")
f = update(base.formula, ~ . - day_of_week)
# drop1(glmer(f, data = projects.trunc.bat, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - duration_float_sc)
# drop1(glmer(f, data = projects.trunc.bat, family = Gamma(link = "log")), test="Chisq")
f = update(f, ~ . - text_length_words_sc)
# drop1(glmer(f, data = projects.trunc.bat, family = Gamma(link = "log")), test="Chisq")
mean.mod = glmer(f, data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0505245
## (tol = 0.001, component 1)
# summary(mean.mod)
mod = glmer(update(f, ~ . + scale(battle_salience)), data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0517634
## (tol = 0.001, component 1)
compare(mean.mod, mod, "scale(battle_salience)", 0.16)
## Variable: scale(battle_salience)
## DF diff: 1 New AIC: 26050.9 Chisq: 4.7 P(>chisq) = 0.0306 (significant))
## beta: -0.018 (+/- 0.008) stat: -2.17 => P(>|z|) = 0.03
## rate ratio: -0.3% (-0.4% to -0.2%)
mod = glmer(update(f, ~ . + scale(battle_prod)), data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0825706
## (tol = 0.001, component 1)
compare(mean.mod, mod, "scale(battle_prod)")
## Variable: scale(battle_prod)
## DF diff: 1 New AIC: 26055.1 Chisq: 0.5 P(>chisq) = 0.4942 )
## beta: 0.0056 (+/- 0.008) stat: 0.68 => P(>|z|) = 0.4955
## rate ratio: 0.6% (-0.3% to 1.4%)
mod = glmer(update(f, ~ . + scale(first_instantiation)), data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.036784
## (tol = 0.001, component 1)
compare(mean.mod, mod, "scale(first_instantiation)")
## Variable: scale(first_instantiation)
## DF diff: 1 New AIC: 26055.5 Chisq: 0.1 P(>chisq) = 0.721 )
## beta: 0.0028 (+/- 0.008) stat: 0.34 => P(>|z|) = 0.735
## rate ratio: 0.3% (-0.5% to 1.1%)
mod = glmer(update(base.formula, ~ . + scale(battle_salience)), data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0506704
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "scale(battle_salience)", 0.16)
## Variable: scale(battle_salience)
## DF diff: 1 New AIC: 26059.9 Chisq: 2.9 P(>chisq) = 0.0875 )
## beta: -0.0154 (+/- 0.009) stat: -1.7 => P(>|z|) = 0.0897
## rate ratio: -0.2% (-0.4% to -0.1%)
mod = glmer(update(base.formula, ~ . + scale(battle_prod)), data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0475429
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "scale(battle_prod)")
## Variable: scale(battle_prod)
## DF diff: 1 New AIC: 26062.7 Chisq: 0.1 P(>chisq) = 0.7882 )
## beta: 0.0023 (+/- 0.008) stat: 0.27 => P(>|z|) = 0.7845
## rate ratio: 0.2% (-0.6% to 1.1%)
mod = glmer(update(base.formula, ~ . + scale(first_instantiation)), data = projects.trunc.bat, family = Gamma(link = "log"))
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper =
## rep.int(Inf, : failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0438204
## (tol = 0.001, component 1)
compare(mean.mod.base, mod, "scale(first_instantiation)")
## Variable: scale(first_instantiation)
## DF diff: 1 New AIC: 26062.5 Chisq: 0.3 P(>chisq) = 0.5764 )
## beta: 0.0046 (+/- 0.008) stat: 0.56 => P(>|z|) = 0.5749
## rate ratio: 0.5% (-0.4% to 1.3%)